STMトピックモデル
Stucked Area plot
library(stm)
library(dplyr)
library(tidyverse)
library(tidytext)
library(tidystm)
#devtools::install_github("mikajoh/tidystm")
library(ggplot2)
library(ggthemes)
library(sunburstR)
model <- readRDS("data/model_12.obj")
meta <- readRDS("data/meta.obj")
raw <- read.csv("data/tiabList.csv")
data <- na.omit(raw)
prep3 <-stm:: estimateEffect(c(1:12) ~ poly(py,3), model, metadata=meta)
td_beta<-tidy(model)
library(RColorBrewer)
library(plotly)
library(tidystm)
cols<-brewer.pal(12, "Set3")
effect <- extract.estimateEffect(prep3, 'py',
model = model,
method="pointestimate",
labeltype="prob")
effect$covariate.value<-as.numeric(effect$covariate.value)
effect%>%filter(covariate.value>1990)->effect2
names(effect2)[2]<-"Topic"
effect2$Topic<-as.factor(effect2$Topic)
g = ggplot(effect2, aes( x = covariate.value,
y = estimate, fill = Topic))
g = g + geom_area(col="white")+
ylim(0,1.10)+
xlab("time")+
ylab("Relative topic prevalence")+
scale_fill_brewer(palette = "Set3")+
theme_minimal()+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
#library(htmlwidgets)
plotly::ggplotly(g)
#htmlwidgets::saveWidget(l, "stucked.html")
Sunburst plot
top_terms <- td_beta %>%
arrange(beta) %>%
group_by(topic) %>%
top_n(30, beta) %>% arrange(topic,beta)
sequence<-paste0("Topic ",top_terms$topic,"-",top_terms$term)
dfdf<-data.frame(sequence,prob=top_terms$beta)
dfdf%>%sunburst(explanation =
"function(d){return d.data.name}")->p
p
#htmltools::save_html(p, file = "sunburst.html")
Word Cloud
library(wordcloud2)
library(hwordcloud)
bb<-sort(seq(1,500,12)^3,decreasing = T)
bbn<-length(bb)
topics <- labelTopics(model, 1:12,bbn)
for (i in 1:12) {
topics$frex[i,]
bbn1<-bbn+1
frex <- data.frame(words=topics$frex[i,], n=bbn1-seq(topics$frex[i,]), stringsAsFactors=F)
frex$words <- str_replace_all(frex$words, "_", " ")
bb<-sort(seq(0.1,2,0.07),decreasing = T)
clouds <- data.frame(words = c(frex$words),
weight = c(frex$n*bb))
assign(paste0("wc_", i), hwordcloud(text = clouds$word,
size = clouds$weight,
width = "100%",
height = "200px",
theme = "darkblue"))
}
wc_1
wc_2
wc_3
wc_4
wc_5
wc_6
wc_7
wc_8
wc_9
wc_10
wc_11
wc_12
LDAvis
temp<-textProcessor(documents=data$ab,metadata=meta)
## Building corpus...
## Converting to Lower Case...
## Removing punctuation...
## Removing stopwords...
## Removing numbers...
## Stemming...
## Creating Output...
toLDAvis(model,temp$documents,out.dir = "jsn")
LDAvis